# -*- coding:utf-8 -*-
# @Time:2024/4/1921:01
# @Author:miuzg
# @FileName:new test2.py
# @Software:PyCharm

from bs4 import BeautifulSoup

html = """
<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>包含 a、tr 和 td 标签的 HTML 示例</title>
</head>

<body>
    <h1>包含 a、tr 和 td 标签的表格</h1>
    <table border="1">
        <tr>
            <th>网站名称</th>
            <th>网站链接</th>
        </tr>
        <tr>
            <td>百度</td>
            <td><a href="https://www.baidu.com">访问百度</a></td>
        </tr>
        <tr>
            <td>谷歌</td>
            <td><a href="https://www.google.com">访问谷歌</a></td>
        </tr>
    </table>
</body>

</html>
"""
soup = BeautifulSoup(html,'lxml')

# 获取所有a标签的值
a = soup.find_all('a')
print(a)

# 获取所有a标签的href值
a_tag = soup.find_all('a')
for i in a_tag:
    print(i['href'])
    print(i.attrs['href'])

# 获取所有tr标签信息
tr = soup.find_all('tr')[1:]
print(tr)

web_info = []
for t in tr:
    # 获取具体单元格信息
    td = t.find_all('td')
    web_name = td[0].string
    func = td[1].string

    info = {
        '网站名字':web_name,
        '功能':func
    }
    web_info.append(info)
    infos = t.get_text()
    print(infos)

for data in web_info:
    print(data)